{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Numpy的数学统计函数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "本节内容：\n",
    "\n",
    "#### 1、Numpy有哪些数学统计函数：  \n",
    "<table style=\"margin-left:0px; text-align:left;\">\n",
    "    <tr><th>函数名</th><th>说明</th></tr>\n",
    "    <tr><td>np.sum</td><td>所有元素的和</td></tr>\n",
    "    <tr><td>np.prod</td><td>所有元素的乘积</td></tr>\n",
    "    <tr><td>np.cumsum</td><td>元素的累积加和</td></tr>\n",
    "    <tr><td>np.cumprod</td><td>元素的累积乘积</td></tr>\n",
    "    <tr><td>np.min</td><td>最小值</td></tr>\n",
    "    <tr><td>np.max</td><td>最大值</td></tr>\n",
    "    <tr><td>np.percentile</td><td>0-100百分位数</td></tr>\n",
    "    <tr><td>np.quantile</td><td>0-1分位数</td></tr>\n",
    "    <tr><td>np.median</td><td>中位数</td></tr>\n",
    "    <tr><td>np.average</td><td>加权平均，参数可以指定weights</td></tr>\n",
    "    <tr><td>np.mean</td><td>平均值</td></tr>\n",
    "    <tr><td>np.std</td><td>标准差</td></tr>\n",
    "    <tr><td>np.var</td><td>方差</td></tr>\n",
    "</table>\n",
    "\n",
    "#### 2、怎样实现按不同的axis计算\n",
    "以上函数，都有一个参数叫做axis用于指定计算轴为行还是列，如果不指定，那么会计算所有元素的结果\n",
    "\n",
    "#### 3、实例：机器学习将数据进行标准化\n",
    "A = (A - mean(A, axis=0)) / std(A, axis=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1、Numpy的数学统计函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(12).reshape(3,4)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "66"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.prod(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45, 55, 66], dtype=int32)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.cumsum(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.cumprod(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.min(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.max(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2.75, 5.5 , 8.25])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.percentile(arr, [25, 50, 75])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2.75, 5.5 , 8.25])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(arr, [0.25, 0.5, 0.75])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.5"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.median(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.5"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.452052529534663"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11.916666666666666"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.var(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.374443700857059"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# weights的shape需要和arr一样\n",
    "weights = np.random.rand(*arr.shape)\n",
    "np.average(arr, weights=weights)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2、Numpy的axis参数的用途"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "axis=0代表行、axis=1代表列\n",
    "\n",
    "对于sum/mean/media等聚合函数：\n",
    "* 理解1：axis=0代表把行消解掉，axis=1代表把列消解掉\n",
    "* 理解2：axis=0代表跨行计算，axis=1代表跨列计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([12, 15, 18, 21])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.sum(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 6, 22, 38])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.sum(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  6,  8, 10],\n",
       "       [12, 15, 18, 21]], dtype=int32)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.cumsum(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  3,  6],\n",
       "       [ 4,  9, 15, 22],\n",
       "       [ 8, 17, 27, 38]], dtype=int32)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.cumsum(axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3、实例：机器学习将数据进行标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3],\n",
       "       [ 4,  5,  6,  7],\n",
       "       [ 8,  9, 10, 11]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "arr如果对应到现实世界的一种解释：\n",
    "* 行：每行对应一个样本数据\n",
    "* 列：每列代表样本的一个特征\n",
    "\n",
    "数据标准化：  \n",
    "* 对于机器学习、神经网络来说，不同列的量纲应该相同，训练收敛的更快；\n",
    "* 比如商品的价格是0到100元、销量是1万到10万个，这俩数字没有可比性，因此需要先都做标准化；\n",
    "* 不同列代表不同的特征，因此需要axis=0做计算\n",
    "* 标准化一般使用A = (A - mean(A, axis=0)) / std(A, axis=0)公式进行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4., 5., 6., 7.])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每列的均值\n",
    "mean = np.mean(arr, axis=0)\n",
    "mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([3.26598632, 3.26598632, 3.26598632, 3.26598632])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算每列的方差\n",
    "std = np.std(arr, axis=0)\n",
    "std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-4., -4., -4., -4.],\n",
       "       [ 0.,  0.,  0.,  0.],\n",
       "       [ 4.,  4.,  4.,  4.]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 计算分子，注意每行都会分别减去[4., 5., 6., 7.]，这叫做numpy的广播\n",
    "fenzi = arr-mean\n",
    "fenzi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.22474487, -1.22474487, -1.22474487, -1.22474487],\n",
       "       [ 0.        ,  0.        ,  0.        ,  0.        ],\n",
       "       [ 1.22474487,  1.22474487,  1.22474487,  1.22474487]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = fenzi/std\n",
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 用随机数再试一次"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[99, 90, 17, 96],\n",
       "       [27, 63, 80, 61],\n",
       "       [75, 27, 85,  3]])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr2 = np.random.randint(1, 100, size=(3,4))\n",
    "arr2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.06904497,  1.16247639, -1.41113333,  1.11249683],\n",
       "       [-1.33630621,  0.11624764,  0.62477659,  0.19990177],\n",
       "       [ 0.26726124, -1.27872403,  0.78635674, -1.3123986 ]])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result = (arr2 - np.mean(arr2,axis=0)) / np.std(arr2,axis=0)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
